Data analysis and visualisation
#Trips per month
uber_total %>%
group_by(Month) %>%
tally() %>%
ggplot(aes(x= Month , y= n)) +
scale_x_continuous(breaks = seq(4,9,1)) +
geom_col(fill="blue")+
labs(x= 'Month' ,
y= 'Trips' ,
title = 'Trips each month in 2014' ,
subtitle = 'There was a big rise in September') +
scale_y_continuous(labels = comma)

#Trips per hour
uber_total %>%
group_by(Hour) %>%
tally()%>%
ggplot(aes(x= Hour , y= n)) +
geom_col(fill="red")+
scale_x_continuous(breaks = seq(0,23,1)) +
labs(x= 'Hour' ,
y= 'Trips' ,
title = 'Trips each hour in 2014' ,
subtitle = 'Peak hours in 7-8 AM and 4-6 PM') +
scale_y_continuous(labels = comma)

#Adding day_of_the_week column
uber_total %>%
mutate(
day_of_the_week = case_when(
Weekday == 1 ~ 'Sunday' ,
Weekday == 2 ~ 'Monday' ,
Weekday == 3 ~ 'Tuesday' ,
Weekday == 4 ~ 'Wednesday' ,
Weekday == 5 ~ 'Thursday' ,
Weekday == 6 ~ 'Friday' ,
Weekday == 7 ~ 'Saturday'
)
) -> uber_total
#Adjusting week of the day to the right order for plots below
uber_total$day_of_the_week <- factor(uber_total$day_of_the_week, levels=c("Sunday","Saturday","Friday","Thursday", "Wednesday","Tuesday","Monday"))
#Hourly pickups by day of Week in 2014
uber_total %>%
group_by(Hour,day_of_the_week) %>%
tally()%>%
ggplot(mapping = aes(x=Hour, y=day_of_the_week, fill=n))+
geom_tile(colour='white') +
scale_x_continuous(breaks = seq(0,23,1)) +
theme_minimal()+
scale_fill_viridis(option = 'D') +
labs(x= 'Hour' ,
y= 'Weekday' ,
subtitle = 'Hourly pickups by day of Week in 2014' ,
fill = 'Number of pickups')

weekdaylist<-c("Monday","Tuesday","Wednesday","Thursday","Friday")
uber_total$Workday=ifelse(uber_total$day_of_the_week %in% weekdaylist ,"Weekday","Weekend")
uber_total %>%
group_by(Date,Workday)%>%
summarise(Trips = n())%>%
ggplot(mapping = aes(x=Date,y=Trips,color=Workday))+
scale_color_manual(values =c("royalblue4","red2"))+
geom_point()+geom_smooth(method=lm, fill='yellowgreen',
color='yellow1',se=FALSE,size=1, formula = y~x)+
ggtitle("Pickups:Weekdays vs. Weekends")+ylab("Total Pickups per Day")

#Pickup numbers change monthly per base
uber_total %>%
group_by(Month, Base) %>%
tally()%>%
ggplot(mapping = aes(x=Month, y=n, colour=Base))+
geom_line()+
theme_minimal()+
scale_y_continuous(labels = comma)+
labs(x= 'Month',
y = 'Pickup',
title = 'Pickup numbers change monthly per base',
subtitle = 'B02764 and B02617 see big rise since August')

#Hex Map of Geographical Location of Uber Rides
ggplot(data = uber_total , mapping= aes(x = Lat, y = Lon)) +
geom_hex(bins = 80) +
ggtitle('Hex Map of Geographical Location of Uber Rides') +
xlab('Latitude (Degrees North)') +
ylab('Longitude (Degrees East)') +
labs(subtitle = 'Most rides located around 40.7° N, 74° W')

#NY Uber Rides Map During 2014 Apr-Sep
min_lat <- 40.5774
max_lat <- 40.9176
min_long <- -74.15
max_long <- -73.7004
ggplot(uber_total, aes(x=Lon, y=Lat)) +
geom_point(size = 0.1 , colour= 'blue') +
scale_x_continuous(limits = c(min_long, max_long)) +
scale_y_continuous(limits = c(min_lat, max_lat)) +
theme_map() +
ggtitle("NY Uber Rides Map During 2014 Apr-Sep")+
labs(subtitle = 'Most rides located at Manhattan, Brooklyn, Queens')
## Warning: Removed 71701 rows containing missing values (geom_point).

#Choosing the Top 30 locations in 2014 by total volumes
library(leaflet)
uber_2014_top <- uber_total %>%
count(Lat, Lon, sort = TRUE) %>%
head(30)
#The Top 30 locations in 2014 by total volumes map
leaflet(uber_2014_top) %>%
addTiles() %>%
setView(-74.15, 40.91, zoom = 8) %>%
addMarkers(~Lon, ~Lat)
#Choosing the Top 30 locations in 2014 by total volumes in weekday morning peak hours
uber_morning_peak_weekday <- uber_total %>%
filter(Workday == 'Weekday') %>%
filter(Hour %in% c(7,8,9)) %>%
count(Lat, Lon, sort = TRUE) %>%
head(30)
#The Top 30 locations in 2014 by total volumes map in weekday morning peak hours
leaflet(uber_morning_peak_weekday) %>%
addTiles() %>%
setView(-74.15, 40.91, zoom = 8) %>%
addMarkers(~Lon, ~Lat)
#Choosing the Top 30 locations in 2014 by total volumes in weekday evening peak hours
uber_evening_peak_weekday <- uber_total %>%
filter(Workday == 'Weekday') %>%
filter(Hour %in% c(17,18,19,20)) %>%
count(Lat, Lon, sort = TRUE) %>%
head(30)
#The Top 30 locations in 2014 by total volumes map in weekday evening peak hours
leaflet(uber_evening_peak_weekday) %>%
addTiles() %>%
setView(-74.15, 40.91, zoom = 8) %>%
addMarkers(~Lon, ~Lat)
#Choosing the Top 30 locations in 2014 by total volumes in weekend party time
uber_party_1 <- uber_total %>%
filter(Workday == 'Weekend') %>%
filter(Hour %in% c(0,1,2,3)) %>%
count(Lat, Lon, sort = TRUE) %>%
head(30)
#The Top 30 locations in 2014 by total volumes map in weekend party time
leaflet(uber_party_1) %>%
addTiles() %>%
setView(-74.15, 40.91, zoom = 8) %>%
addMarkers(~Lon, ~Lat)
#Another version of party map
#To get the map of NY City
newyork <- get_map(location = c(-74.5477,40.4553,-73.1525,41.0618),source = "osm")
#Filtering the data of party time
uber_party_2 <- uber_total %>%
filter(Workday == 'Weekend') %>%
filter(Hour %in% c(0,1,2,3))
#Getting the map
ggmap(newyork)+
geom_point(data = uber_party_2, mapping = aes(x= Lon , y= Lat) ,alpha = 0.03)

#The Gif of NY Uber Rides Map Changes During Apr 2014
p <- ggplot(data=subset(uber_total,Month==4), aes(x=Lon, y=Lat)) +
geom_point(size = 0.1 , colour= 'blue') +
scale_x_continuous(limits = c(min_long, max_long)) +
scale_y_continuous(limits = c(min_lat, max_lat)) +
theme_map() +
labs(subtitle = 'Date:{frame_time}')+
ggtitle("NY Uber Rides Map Changes During Apr 2014")+
transition_time(Date)
p
